install.packages("ggplot")
## Installing package into 'C:/Users/xroja/AppData/Local/R/win-library/4.2'
## (as 'lib' is unspecified)
## Warning in install.packages :
## package 'ggplot' is not available for this version of R
##
## A version of this package for your version of R might be available elsewhere,
## see the ideas at
## https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
library(e1071)
library ( plotly )
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggplot2)
library(caret)
## Loading required package: lattice
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(glue)
library(tidyverse)
## ── Attaching core tidyverse packages ───────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.2.1
## ✔ purrr 1.0.1 ✔ tidyr 1.3.0
## ✔ readr 2.1.4
## ── Conflicts ─────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks plotly::filter(), stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::lift() masks caret::lift()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(scales)
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
library(traineR)
##
## Attaching package: 'traineR'
##
## The following object is masked from 'package:caret':
##
## contr.dummy
library(kknn)
##
## Attaching package: 'kknn'
##
## The following objects are masked from 'package:traineR':
##
## contr.dummy, contr.metric, contr.ordinal
##
## The following object is masked from 'package:caret':
##
## contr.dummy
#Para formatear el doc es Ctrl + Shift + A
# ========================
# Ejercicio 1
# ========================
datos <-
read.csv(
'tumores.csv',
header = TRUE,
sep = ',',
dec = '.',
stringsAsFactors = T
)
datos$tipo <- factor(datos$tipo)
set.seed(123)
muestra <- createDataPartition(datos$tipo, p = 0.25, list = FALSE)
taprendizaje <- datos[-muestra,]
ttesting <- datos[muestra,]
accuracies.svm <- list()
kmax <- floor(sqrt(nrow(datos)))
# Linear
modelo <- train.svm(tipo ~ ., data = taprendizaje, kernel = "linear")
modelo
##
## Call:
## svm(formula = tipo ~ ., data = taprendizaje, kernel = "linear", probability = TRUE)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 1
##
## Number of Support Vectors: 128
prediccion <- predict(modelo, ttesting, type = "class")
MC <- confusion.matrix(ttesting, prediccion)
index <- general.indexes(mc = MC)
index
##
## Confusion Matrix:
## prediction
## real 0 1
## 0 15 10
## 1 7 288
##
## Overall Accuracy: 0.9469
## Overall Error: 0.0531
##
## Category Accuracy:
##
## 0 1
## 0.600000 0.976271
accuracies.svm$linear <- index$category.accuracy[2][[1]]
accuracies.svm
## $linear
## [1] 0.9762712
# Radial
modelo <- train.svm(tipo ~ ., data = taprendizaje, kernel = "radial")
modelo
##
## Call:
## svm(formula = tipo ~ ., data = taprendizaje, kernel = "radial", probability = TRUE)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1
##
## Number of Support Vectors: 156
prediccion <- predict(modelo, ttesting, type = "class")
MC <- confusion.matrix(ttesting, prediccion)
index <- general.indexes(mc = MC)
index
##
## Confusion Matrix:
## prediction
## real 0 1
## 0 0 25
## 1 0 295
##
## Overall Accuracy: 0.9219
## Overall Error: 0.0781
##
## Category Accuracy:
##
## 0 1
## 0.000000 1.000000
accuracies.svm$radial <- index$category.accuracy[2][[1]]
# Plynomial
modelo <- train.svm(tipo ~ ., data = taprendizaje, kernel = "polynomial")
modelo
##
## Call:
## svm(formula = tipo ~ ., data = taprendizaje, kernel = "polynomial",
## probability = TRUE)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: polynomial
## cost: 1
## degree: 3
## coef.0: 0
##
## Number of Support Vectors: 145
prediccion <- predict(modelo, ttesting, type = "class")
MC <- confusion.matrix(ttesting, prediccion)
index <- general.indexes(mc = MC)
index
##
## Confusion Matrix:
## prediction
## real 0 1
## 0 0 25
## 1 0 295
##
## Overall Accuracy: 0.9219
## Overall Error: 0.0781
##
## Category Accuracy:
##
## 0 1
## 0.000000 1.000000
accuracies.svm$poly <- index$category.accuracy[2][[1]]
# Sigmoid
modelo <- train.svm(tipo ~ ., data = taprendizaje, kernel = "sigmoid")
modelo
##
## Call:
## svm(formula = tipo ~ ., data = taprendizaje, kernel = "sigmoid",
## probability = TRUE)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: sigmoid
## cost: 1
## coef.0: 0
##
## Number of Support Vectors: 156
prediccion <- predict(modelo, ttesting, type = "class")
MC <- confusion.matrix(ttesting, prediccion)
index <- general.indexes(mc = MC)
index
##
## Confusion Matrix:
## prediction
## real 0 1
## 0 0 25
## 1 0 295
##
## Overall Accuracy: 0.9219
## Overall Error: 0.0781
##
## Category Accuracy:
##
## 0 1
## 0.000000 1.000000
accuracies.svm$sigmoid <- index$category.accuracy[2][[1]]
#Rectangular
modelo<- train.knn(tipo~., data = taprendizaje, kmax=kmax, kernel = "rectangular")
modelo
##
## Call:
## kknn::train.kknn(formula = tipo ~ ., data = taprendizaje, kmax = kmax, kernel = "rectangular")
##
## Type of response variable: nominal
## Minimal misclassification: 0.04293194
## Best kernel: rectangular
## Best k: 1
prediccion <- predict(modelo, ttesting, type = "class")
MC <- confusion.matrix(ttesting, prediccion)
index <-general.indexes(mc = MC)
accuracies.knn <- list()
accuracies.knn$rectangular <- index$category.accuracy[2][[1]]
#Triangular
modelo<- train.knn(tipo~., data = taprendizaje, kmax=kmax, kernel = "triangular")
modelo
##
## Call:
## kknn::train.kknn(formula = tipo ~ ., data = taprendizaje, kmax = kmax, kernel = "triangular")
##
## Type of response variable: nominal
## Minimal misclassification: 0.04188482
## Best kernel: triangular
## Best k: 3
prediccion <- predict(modelo, ttesting, type = "class")
MC <- confusion.matrix(ttesting, prediccion)
index <-general.indexes(mc = MC)
accuracies.knn$triangular <- index$category.accuracy[2][[1]]
#Epanechnikov
modelo<- train.knn(tipo~., data = taprendizaje, kmax=kmax, kernel = "epanechnikov")
modelo
##
## Call:
## kknn::train.kknn(formula = tipo ~ ., data = taprendizaje, kmax = kmax, kernel = "epanechnikov")
##
## Type of response variable: nominal
## Minimal misclassification: 0.04293194
## Best kernel: epanechnikov
## Best k: 1
prediccion <- predict(modelo, ttesting, type = "class")
MC <- confusion.matrix(ttesting, prediccion)
index <-general.indexes(mc = MC)
accuracies.knn$Epanechnikov <- index$category.accuracy[2][[1]]
#Biweight
modelo<- train.knn(tipo~., data = taprendizaje, kmax=kmax, kernel = "biweight")
modelo
##
## Call:
## kknn::train.kknn(formula = tipo ~ ., data = taprendizaje, kmax = kmax, kernel = "biweight")
##
## Type of response variable: nominal
## Minimal misclassification: 0.0408377
## Best kernel: biweight
## Best k: 7
prediccion <- predict(modelo, ttesting, type = "class")
MC <- confusion.matrix(ttesting, prediccion)
index <-general.indexes(mc = MC)
accuracies.knn$Biweight <- index$category.accuracy[2][[1]]
#Triweight
modelo<- train.knn(tipo~., data = taprendizaje, kmax=kmax, kernel = "triweight")
modelo
##
## Call:
## kknn::train.kknn(formula = tipo ~ ., data = taprendizaje, kmax = kmax, kernel = "triweight")
##
## Type of response variable: nominal
## Minimal misclassification: 0.03979058
## Best kernel: triweight
## Best k: 11
prediccion <- predict(modelo, ttesting, type = "class")
MC <- confusion.matrix(ttesting, prediccion)
index <-general.indexes(mc = MC)
accuracies.knn$Triweight <- index$category.accuracy[2][[1]]
#Cos
modelo<- train.knn(tipo~., data = taprendizaje, kmax=kmax, kernel = "cos")
modelo
##
## Call:
## kknn::train.kknn(formula = tipo ~ ., data = taprendizaje, kmax = kmax, kernel = "cos")
##
## Type of response variable: nominal
## Minimal misclassification: 0.04188482
## Best kernel: cos
## Best k: 3
prediccion <- predict(modelo, ttesting, type = "class")
MC <- confusion.matrix(ttesting, prediccion)
index<-general.indexes(mc = MC)
accuracies.knn$Cos <- index$category.accuracy[2][[1]]
#Inv
modelo<- train.knn(tipo~., data = taprendizaje, kmax=kmax, kernel = "inv")
modelo
##
## Call:
## kknn::train.kknn(formula = tipo ~ ., data = taprendizaje, kmax = kmax, kernel = "inv")
##
## Type of response variable: nominal
## Minimal misclassification: 0.04188482
## Best kernel: inv
## Best k: 4
prediccion <- predict(modelo, ttesting, type = "class")
MC <- confusion.matrix(ttesting, prediccion)
index<-general.indexes(mc = MC)
accuracies.knn$Inv <- index$category.accuracy[2][[1]]
#Gaussian
modelo<- train.knn(tipo~., data = taprendizaje, kmax=kmax, kernel = "gaussian")
modelo
##
## Call:
## kknn::train.kknn(formula = tipo ~ ., data = taprendizaje, kmax = kmax, kernel = "gaussian")
##
## Type of response variable: nominal
## Minimal misclassification: 0.04188482
## Best kernel: gaussian
## Best k: 6
prediccion <- predict(modelo, ttesting, type = "class")
MC <- confusion.matrix(ttesting, prediccion)
index<-general.indexes(mc = MC)
accuracies.knn$Gaussian <- index$category.accuracy[2][[1]]
#Optimal
modelo<- train.knn(tipo~., data = taprendizaje, kmax=kmax, kernel = "optimal")
modelo
##
## Call:
## kknn::train.kknn(formula = tipo ~ ., data = taprendizaje, kmax = kmax, kernel = "optimal")
##
## Type of response variable: nominal
## Minimal misclassification: 0.04293194
## Best kernel: optimal
## Best k: 1
prediccion <- predict(modelo, ttesting, type = "class")
MC <- confusion.matrix(ttesting, prediccion)
index<-general.indexes(mc = MC)
accuracies.knn$Optimal <- index$category.accuracy[2][[1]]
# CON knn
accuracies.knn
## $rectangular
## [1] 0.9491525
##
## $triangular
## [1] 0.9491525
##
## $Epanechnikov
## [1] 0.9491525
##
## $Biweight
## [1] 0.9559322
##
## $Triweight
## [1] 0.9661017
##
## $Cos
## [1] 0.9491525
##
## $Inv
## [1] 0.9627119
##
## $Gaussian
## [1] 0.9728814
##
## $Optimal
## [1] 0.9491525
max_acc <- max(unlist(accuracies.knn))
max_idxs <- which(unlist(accuracies.knn) == max_acc)
max_names <- names(accuracies.knn)[max_idxs]
print(paste(
"Modelos de precision maxima:",
paste(max_names, collapse = ", ")
))
## [1] "Modelos de precision maxima: Gaussian"
# CON SVM
accuracies.svm
## $linear
## [1] 0.9762712
##
## $radial
## [1] 1
##
## $poly
## [1] 1
##
## $sigmoid
## [1] 1
max_acc <- max(unlist(accuracies.svm))
max_idxs <- which(unlist(accuracies.svm) == max_acc)
max_names <- names(accuracies.svm)[max_idxs]
print(paste(
"Modelos de precision maxima:",
paste(max_names, collapse = ", ")
))
## [1] "Modelos de precision maxima: radial, poly, sigmoid"
print("Tal como podemos observar los resultados de svm son mejores en comparacion a los de k vecinos,lo que mos indica que es es el modelo ede precision maxima ")
## [1] "Tal como podemos observar los resultados de svm son mejores en comparacion a los de k vecinos,lo que mos indica que es es el modelo ede precision maxima "
# ========================
# Ejercicio 2
# ========================
ejercicio1 <- function(matrizConfu) {
VN <- matrizConfu[1, 1]
FN <- matrizConfu[2, 1]
FP <- matrizConfu[1, 2]
VP <- matrizConfu[2, 2]
precision_global <- (VN + VP) / (VN + FP + FN + VP)
error_global <- 1 - precision_global
precision_positiva <- VP / (FN + VP)
precision_negativa <- VN / (VN + FP)
falsos_positivos <- FP / (VN + FP)
falsos_negativos <- FN / (FN + VP)
asertividad_positiva <- VP / (FP + VP)
asertividad_negativa <- VN / (VN + FN)
metricas <- list(
precision_global = precision_global,
error_global = error_global,
precision_positiva = precision_positiva,
precision_negativa = precision_negativa,
falsos_positivos = falsos_positivos,
falsos_negativos = falsos_negativos,
asertividad_positiva = asertividad_positiva,
asertividad_negativa = asertividad_negativa
)
return(metricas)
}
datos <- read.table("titanicV2020.csv", header=TRUE, sep=',',dec='.',stringsAsFactors = T)
datos$Survived <- factor(datos$Survived, levels = c(1, 0))
datos$Pclass <- factor(datos$Pclass, levels = c(1, 2, 3))
datos$Sex <- factor(datos$Sex, levels = c("female", "male"))
datos$Embarked <- factor(datos$Embarked, levels = c("C", "Q", "S"))
# datos
#"PassengerId", "Ticket" y "Cabin" se pueden ignorar tambien
#se ignora la variable "Name"
datos <-
datos[, c("Survived",
"Pclass",
"Sex",
"Age",
"Fare",
"Embarked")]
datos
## Survived Pclass Sex Age Fare Embarked
## 1 0 3 male 22.00 7.2500 S
## 2 1 1 female 38.00 71.2833 C
## 3 1 3 female 26.00 7.9250 S
## 4 1 1 female 35.00 53.1000 S
## 5 0 3 male 35.00 8.0500 S
## 6 0 3 male NA 8.4583 Q
## 7 0 1 male 54.00 51.8625 S
## 8 0 3 male 2.00 21.0750 S
## 9 1 3 female 27.00 11.1333 S
## 10 1 2 female 14.00 30.0708 C
## 11 1 3 female 4.00 16.7000 S
## 12 1 1 female 58.00 26.5500 S
## 13 0 3 male 20.00 8.0500 S
## 14 0 3 male 39.00 31.2750 S
## 15 0 3 female 14.00 7.8542 S
## 16 1 2 female 55.00 16.0000 S
## 17 0 3 male 2.00 29.1250 Q
## 18 1 2 male NA 13.0000 S
## 19 0 3 female 31.00 18.0000 S
## 20 1 3 female NA 7.2250 C
## 21 0 2 male 35.00 26.0000 S
## 22 1 2 male 34.00 13.0000 S
## 23 1 3 female 15.00 8.0292 Q
## 24 1 1 male 28.00 35.5000 S
## 25 0 3 female 8.00 21.0750 S
## 26 1 3 female 38.00 31.3875 S
## 27 0 3 male NA 7.2250 C
## 28 0 1 male 19.00 263.0000 S
## 29 1 3 female NA 7.8792 Q
## 30 0 3 male NA 7.8958 S
## 31 0 1 male 40.00 27.7208 C
## 32 1 1 female NA 146.5208 C
## 33 1 3 female NA 7.7500 Q
## 34 0 2 male 66.00 10.5000 S
## 35 0 1 male 28.00 82.1708 C
## 36 0 1 male 42.00 52.0000 S
## 37 1 3 male NA 7.2292 C
## 38 0 3 male 21.00 8.0500 S
## 39 0 3 female 18.00 18.0000 S
## 40 1 3 female 14.00 11.2417 C
## 41 0 3 female 40.00 9.4750 S
## 42 0 2 female 27.00 21.0000 S
## 43 0 3 male NA 7.8958 C
## 44 1 2 female 3.00 41.5792 C
## 45 1 3 female 19.00 7.8792 Q
## 46 0 3 male NA 8.0500 S
## 47 0 3 male NA 15.5000 Q
## 48 1 3 female NA 7.7500 Q
## 49 0 3 male NA 21.6792 C
## 50 0 3 female 18.00 17.8000 S
## 51 0 3 male 7.00 39.6875 S
## 52 0 3 male 21.00 7.8000 S
## 53 1 1 female 49.00 76.7292 C
## 54 1 2 female 29.00 26.0000 S
## 55 0 1 male 65.00 61.9792 C
## 56 1 1 male NA 35.5000 S
## 57 1 2 female 21.00 10.5000 S
## 58 0 3 male 28.50 7.2292 C
## 59 1 2 female 5.00 27.7500 S
## 60 0 3 male 11.00 46.9000 S
## 61 0 3 male 22.00 7.2292 C
## 62 1 1 female 38.00 80.0000 <NA>
## 63 0 1 male 45.00 83.4750 S
## 64 0 3 male 4.00 27.9000 S
## 65 0 1 male NA 27.7208 C
## 66 1 3 male NA 15.2458 C
## 67 1 2 female 29.00 10.5000 S
## 68 0 3 male 19.00 8.1583 S
## 69 1 3 female 17.00 7.9250 S
## 70 0 3 male 26.00 8.6625 S
## 71 0 2 male 32.00 10.5000 S
## 72 0 3 female 16.00 46.9000 S
## 73 0 2 male 21.00 73.5000 S
## 74 0 3 male 26.00 14.4542 C
## 75 1 3 male 32.00 56.4958 S
## 76 0 3 male 25.00 7.6500 S
## 77 0 3 male NA 7.8958 S
## 78 0 3 male NA 8.0500 S
## 79 1 2 male 0.83 29.0000 S
## 80 1 3 female 30.00 12.4750 S
## 81 0 3 male 22.00 9.0000 S
## 82 1 3 male 29.00 9.5000 S
## 83 1 3 female NA 7.7875 Q
## 84 0 1 male 28.00 47.1000 S
## 85 1 2 female 17.00 10.5000 S
## 86 1 3 female 33.00 15.8500 S
## 87 0 3 male 16.00 34.3750 S
## 88 0 3 male NA 8.0500 S
## 89 1 1 female 23.00 263.0000 S
## 90 0 3 male 24.00 8.0500 S
## 91 0 3 male 29.00 8.0500 S
## 92 0 3 male 20.00 7.8542 S
## 93 0 1 male 46.00 61.1750 S
## 94 0 3 male 26.00 20.5750 S
## 95 0 3 male 59.00 7.2500 S
## 96 0 3 male NA 8.0500 S
## 97 0 1 male 71.00 34.6542 C
## 98 1 1 male 23.00 63.3583 C
## 99 1 2 female 34.00 23.0000 S
## 100 0 2 male 34.00 26.0000 S
## 101 0 3 female 28.00 7.8958 S
## 102 0 3 male NA 7.8958 S
## 103 0 1 male 21.00 77.2875 S
## 104 0 3 male 33.00 8.6542 S
## 105 0 3 male 37.00 7.9250 S
## 106 0 3 male 28.00 7.8958 S
## 107 1 3 female 21.00 7.6500 S
## 108 1 3 male NA 7.7750 S
## 109 0 3 male 38.00 7.8958 S
## 110 1 3 female NA 24.1500 Q
## 111 0 1 male 47.00 52.0000 S
## 112 0 3 female 14.50 14.4542 C
## 113 0 3 male 22.00 8.0500 S
## 114 0 3 female 20.00 9.8250 S
## 115 0 3 female 17.00 14.4583 C
## 116 0 3 male 21.00 7.9250 S
## 117 0 3 male 70.50 7.7500 Q
## 118 0 2 male 29.00 21.0000 S
## 119 0 1 male 24.00 247.5208 C
## 120 0 3 female 2.00 31.2750 S
## 121 0 2 male 21.00 73.5000 S
## 122 0 3 male NA 8.0500 S
## 123 0 2 male 32.50 30.0708 C
## 124 1 2 female 32.50 13.0000 S
## 125 0 1 male 54.00 77.2875 S
## 126 1 3 male 12.00 11.2417 C
## 127 0 3 male NA 7.7500 Q
## 128 1 3 male 24.00 7.1417 S
## 129 1 3 female NA 22.3583 C
## 130 0 3 male 45.00 6.9750 S
## 131 0 3 male 33.00 7.8958 C
## 132 0 3 male 20.00 7.0500 S
## 133 0 3 female 47.00 14.5000 S
## 134 1 2 female 29.00 26.0000 S
## 135 0 2 male 25.00 13.0000 S
## 136 0 2 male 23.00 15.0458 C
## 137 1 1 female 19.00 26.2833 S
## 138 0 1 male 37.00 53.1000 S
## 139 0 3 male 16.00 9.2167 S
## 140 0 1 male 24.00 79.2000 C
## 141 0 3 female NA 15.2458 C
## 142 1 3 female 22.00 7.7500 S
## 143 1 3 female 24.00 15.8500 S
## 144 0 3 male 19.00 6.7500 Q
## 145 0 2 male 18.00 11.5000 S
## 146 0 2 male 19.00 36.7500 S
## 147 1 3 male 27.00 7.7958 S
## 148 0 3 female 9.00 34.3750 S
## 149 0 2 male 36.50 26.0000 S
## 150 0 2 male 42.00 13.0000 S
## 151 0 2 male 51.00 12.5250 S
## 152 1 1 female 22.00 66.6000 S
## 153 0 3 male 55.50 8.0500 S
## 154 0 3 male 40.50 14.5000 S
## 155 0 3 male NA 7.3125 S
## 156 0 1 male 51.00 61.3792 C
## 157 1 3 female 16.00 7.7333 Q
## 158 0 3 male 30.00 8.0500 S
## 159 0 3 male NA 8.6625 S
## 160 0 3 male NA 69.5500 S
## 161 0 3 male 44.00 16.1000 S
## 162 1 2 female 40.00 15.7500 S
## 163 0 3 male 26.00 7.7750 S
## 164 0 3 male 17.00 8.6625 S
## 165 0 3 male 1.00 39.6875 S
## 166 1 3 male 9.00 20.5250 S
## [ reached 'max' / getOption("max.print") -- omitted 1143 rows ]
datos<-na.omit(datos)
columns <- sapply(datos, is.integer)
datos[columns] <- lapply(datos[columns], as.factor)
# 2.2
muestra <- sample(nrow(datos), floor(0.8 * nrow(datos)))
taprendizaje <- datos[muestra,]
ttesting <- datos[-muestra,]
#2.3
#svm
datos
## Survived Pclass Sex Age Fare Embarked
## 1 0 3 male 22.00 7.2500 S
## 2 1 1 female 38.00 71.2833 C
## 3 1 3 female 26.00 7.9250 S
## 4 1 1 female 35.00 53.1000 S
## 5 0 3 male 35.00 8.0500 S
## 7 0 1 male 54.00 51.8625 S
## 8 0 3 male 2.00 21.0750 S
## 9 1 3 female 27.00 11.1333 S
## 10 1 2 female 14.00 30.0708 C
## 11 1 3 female 4.00 16.7000 S
## 12 1 1 female 58.00 26.5500 S
## 13 0 3 male 20.00 8.0500 S
## 14 0 3 male 39.00 31.2750 S
## 15 0 3 female 14.00 7.8542 S
## 16 1 2 female 55.00 16.0000 S
## 17 0 3 male 2.00 29.1250 Q
## 19 0 3 female 31.00 18.0000 S
## 21 0 2 male 35.00 26.0000 S
## 22 1 2 male 34.00 13.0000 S
## 23 1 3 female 15.00 8.0292 Q
## 24 1 1 male 28.00 35.5000 S
## 25 0 3 female 8.00 21.0750 S
## 26 1 3 female 38.00 31.3875 S
## 28 0 1 male 19.00 263.0000 S
## 31 0 1 male 40.00 27.7208 C
## 34 0 2 male 66.00 10.5000 S
## 35 0 1 male 28.00 82.1708 C
## 36 0 1 male 42.00 52.0000 S
## 38 0 3 male 21.00 8.0500 S
## 39 0 3 female 18.00 18.0000 S
## 40 1 3 female 14.00 11.2417 C
## 41 0 3 female 40.00 9.4750 S
## 42 0 2 female 27.00 21.0000 S
## 44 1 2 female 3.00 41.5792 C
## 45 1 3 female 19.00 7.8792 Q
## 50 0 3 female 18.00 17.8000 S
## 51 0 3 male 7.00 39.6875 S
## 52 0 3 male 21.00 7.8000 S
## 53 1 1 female 49.00 76.7292 C
## 54 1 2 female 29.00 26.0000 S
## 55 0 1 male 65.00 61.9792 C
## 57 1 2 female 21.00 10.5000 S
## 58 0 3 male 28.50 7.2292 C
## 59 1 2 female 5.00 27.7500 S
## 60 0 3 male 11.00 46.9000 S
## 61 0 3 male 22.00 7.2292 C
## 63 0 1 male 45.00 83.4750 S
## 64 0 3 male 4.00 27.9000 S
## 67 1 2 female 29.00 10.5000 S
## 68 0 3 male 19.00 8.1583 S
## 69 1 3 female 17.00 7.9250 S
## 70 0 3 male 26.00 8.6625 S
## 71 0 2 male 32.00 10.5000 S
## 72 0 3 female 16.00 46.9000 S
## 73 0 2 male 21.00 73.5000 S
## 74 0 3 male 26.00 14.4542 C
## 75 1 3 male 32.00 56.4958 S
## 76 0 3 male 25.00 7.6500 S
## 79 1 2 male 0.83 29.0000 S
## 80 1 3 female 30.00 12.4750 S
## 81 0 3 male 22.00 9.0000 S
## 82 1 3 male 29.00 9.5000 S
## 84 0 1 male 28.00 47.1000 S
## 85 1 2 female 17.00 10.5000 S
## 86 1 3 female 33.00 15.8500 S
## 87 0 3 male 16.00 34.3750 S
## 89 1 1 female 23.00 263.0000 S
## 90 0 3 male 24.00 8.0500 S
## 91 0 3 male 29.00 8.0500 S
## 92 0 3 male 20.00 7.8542 S
## 93 0 1 male 46.00 61.1750 S
## 94 0 3 male 26.00 20.5750 S
## 95 0 3 male 59.00 7.2500 S
## 97 0 1 male 71.00 34.6542 C
## 98 1 1 male 23.00 63.3583 C
## 99 1 2 female 34.00 23.0000 S
## 100 0 2 male 34.00 26.0000 S
## 101 0 3 female 28.00 7.8958 S
## 103 0 1 male 21.00 77.2875 S
## 104 0 3 male 33.00 8.6542 S
## 105 0 3 male 37.00 7.9250 S
## 106 0 3 male 28.00 7.8958 S
## 107 1 3 female 21.00 7.6500 S
## 109 0 3 male 38.00 7.8958 S
## 111 0 1 male 47.00 52.0000 S
## 112 0 3 female 14.50 14.4542 C
## 113 0 3 male 22.00 8.0500 S
## 114 0 3 female 20.00 9.8250 S
## 115 0 3 female 17.00 14.4583 C
## 116 0 3 male 21.00 7.9250 S
## 117 0 3 male 70.50 7.7500 Q
## 118 0 2 male 29.00 21.0000 S
## 119 0 1 male 24.00 247.5208 C
## 120 0 3 female 2.00 31.2750 S
## 121 0 2 male 21.00 73.5000 S
## 123 0 2 male 32.50 30.0708 C
## 124 1 2 female 32.50 13.0000 S
## 125 0 1 male 54.00 77.2875 S
## 126 1 3 male 12.00 11.2417 C
## 128 1 3 male 24.00 7.1417 S
## 130 0 3 male 45.00 6.9750 S
## 131 0 3 male 33.00 7.8958 C
## 132 0 3 male 20.00 7.0500 S
## 133 0 3 female 47.00 14.5000 S
## 134 1 2 female 29.00 26.0000 S
## 135 0 2 male 25.00 13.0000 S
## 136 0 2 male 23.00 15.0458 C
## 137 1 1 female 19.00 26.2833 S
## 138 0 1 male 37.00 53.1000 S
## 139 0 3 male 16.00 9.2167 S
## 140 0 1 male 24.00 79.2000 C
## 142 1 3 female 22.00 7.7500 S
## 143 1 3 female 24.00 15.8500 S
## 144 0 3 male 19.00 6.7500 Q
## 145 0 2 male 18.00 11.5000 S
## 146 0 2 male 19.00 36.7500 S
## 147 1 3 male 27.00 7.7958 S
## 148 0 3 female 9.00 34.3750 S
## 149 0 2 male 36.50 26.0000 S
## 150 0 2 male 42.00 13.0000 S
## 151 0 2 male 51.00 12.5250 S
## 152 1 1 female 22.00 66.6000 S
## 153 0 3 male 55.50 8.0500 S
## 154 0 3 male 40.50 14.5000 S
## 156 0 1 male 51.00 61.3792 C
## 157 1 3 female 16.00 7.7333 Q
## 158 0 3 male 30.00 8.0500 S
## 161 0 3 male 44.00 16.1000 S
## 162 1 2 female 40.00 15.7500 S
## 163 0 3 male 26.00 7.7750 S
## 164 0 3 male 17.00 8.6625 S
## 165 0 3 male 1.00 39.6875 S
## 166 1 3 male 9.00 20.5250 S
## 168 0 3 female 45.00 27.9000 S
## 170 0 3 male 28.00 56.4958 S
## 171 0 1 male 61.00 33.5000 S
## 172 0 3 male 4.00 29.1250 Q
## 173 1 3 female 1.00 11.1333 S
## 174 0 3 male 21.00 7.9250 S
## 175 0 1 male 56.00 30.6958 C
## 176 0 3 male 18.00 7.8542 S
## 178 0 1 female 50.00 28.7125 C
## 179 0 2 male 30.00 13.0000 S
## 180 0 3 male 36.00 0.0000 S
## 183 0 3 male 9.00 31.3875 S
## 184 1 2 male 1.00 39.0000 S
## 185 1 3 female 4.00 22.0250 S
## 188 1 1 male 45.00 26.5500 S
## 189 0 3 male 40.00 15.5000 Q
## 190 0 3 male 36.00 7.8958 S
## 191 1 2 female 32.00 13.0000 S
## 192 0 2 male 19.00 13.0000 S
## 193 1 3 female 19.00 7.8542 S
## 194 1 2 male 3.00 26.0000 S
## 195 1 1 female 44.00 27.7208 C
## 196 1 1 female 58.00 146.5208 C
## 198 0 3 male 42.00 8.4042 S
## 200 0 2 female 24.00 13.0000 S
## 201 0 3 male 28.00 9.5000 S
## 203 0 3 male 34.00 6.4958 S
## 204 0 3 male 45.50 7.2250 C
## 205 1 3 male 18.00 8.0500 S
## 206 0 3 female 2.00 10.4625 S
## 207 0 3 male 32.00 15.8500 S
## 208 1 3 male 26.00 18.7875 C
## 209 1 3 female 16.00 7.7500 Q
## [ reached 'max' / getOption("max.print") -- omitted 877 rows ]
svm <- train.svm(Survived~., data = taprendizaje, kernel = "sigmoid")
prediccion <- predict(svm, ttesting, type = "class")
MC1 <- confusion.matrix(ttesting, prediccion)
general.indexes(mc=MC1)
##
## Confusion Matrix:
## prediction
## real 1 0
## 1 64 16
## 0 23 106
##
## Overall Accuracy: 0.8134
## Overall Error: 0.1866
##
## Category Accuracy:
##
## 1 0
## 0.800000 0.821705
ejercicio1(MC1)
## $precision_global
## [1] 0.8133971
##
## $error_global
## [1] 0.1866029
##
## $precision_positiva
## [1] 0.8217054
##
## $precision_negativa
## [1] 0.8
##
## $falsos_positivos
## [1] 0.2
##
## $falsos_negativos
## [1] 0.1782946
##
## $asertividad_positiva
## [1] 0.8688525
##
## $asertividad_negativa
## [1] 0.7356322
#2.4
KNN<-train.knn(Survived~.,data=taprendizaje,kmax=floor(sqrt(nrow(datos))))
prediccion <- predict(KNN, ttesting, type = "class")
MC2 <- confusion.matrix(ttesting, prediccion)
(general.indexes(mc = MC2))
##
## Confusion Matrix:
## prediction
## real 1 0
## 1 60 20
## 0 10 119
##
## Overall Accuracy: 0.8565
## Overall Error: 0.1435
##
## Category Accuracy:
##
## 1 0
## 0.750000 0.922481
modelo.kknn<-train.kknn(Survived~.,data=taprendizaje,kmax=floor(sqrt(nrow(datos))))
prediccion <- predict(modelo.kknn, ttesting[,-1])
MC3 <- table(ttesting$Survived, prediccion)
df <- data.frame()
metricas<- c(ejercicio1(MC1))
metricas$Modelo <-"svm"
df <- rbind(df,metricas)
metricas<- c(ejercicio1(MC2))
metricas$Modelo <-"knn"
df <- rbind(df,metricas)
metricas<- c(ejercicio1(MC3))
metricas$Modelo <-"kknn"
df <- rbind(df,metricas)
df
## precision_global error_global precision_positiva precision_negativa
## 1 0.8133971 0.1866029 0.8217054 0.80
## 2 0.8564593 0.1435407 0.9224806 0.75
## 3 0.8564593 0.1435407 0.9224806 0.75
## falsos_positivos falsos_negativos asertividad_positiva asertividad_negativa
## 1 0.20 0.17829457 0.8688525 0.7356322
## 2 0.25 0.07751938 0.8561151 0.8571429
## 3 0.25 0.07751938 0.8561151 0.8571429
## Modelo
## 1 svm
## 2 knn
## 3 kknn
print("Podemos observar que tanto el metodo kknn como knn poseen los mismos valores,por esto podemos decir que se podría utilizar cualquiera de los dos metodos para realizar analisis")
## [1] "Podemos observar que tanto el metodo kknn como knn poseen los mismos valores,por esto podemos decir que se podría utilizar cualquiera de los dos metodos para realizar analisis"
# ========================
# Ejercicio 3
# ========================
# Ejercicio 3.1
getwd()
## [1] "I:/Tarea7"
# setwd("/Users/AndresR/Documents/Tarea7")
data <-
read.csv(
'ZipData_2020.csv',
header = TRUE,
sep = ';',
dec = '.',
)
data
## Numero V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13
## 1 seis -1 -1 -1 -1.000 -1.000 -1.000 -1.000 -0.631 0.862 -0.167 -1.000 -1.000
## 2 cinco -1 -1 -1 -0.813 -0.671 -0.809 -0.887 -0.671 -0.853 -1.000 -1.000 -0.774
## 3 cuatro -1 -1 -1 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -0.996 0.147 1.000
## V14 V15 V16 V17 V18 V19 V20 V21 V22 V23 V24 V25 V26
## 1 -1.000 -1.000 -1.000 -1 -1 -1 -1 -1.000 -1 -1.000 -0.992 0.297 1.000
## 2 -0.180 0.052 -0.241 -1 -1 -1 -1 0.392 1 0.857 0.727 1.000 0.805
## 3 -0.189 -1.000 -1.000 -1 -1 -1 -1 -1.000 -1 -1.000 -1.000 -1.000 -1.000
## V27 V28 V29 V30 V31 V32 V33 V34 V35 V36 V37 V38 V39 V40 V41
## 1 0.307 -1.000 -1.00 -1.000 -1 -1.000 -1 -1 -1 -1 -1.000 -1 -1 -0.41 1
## 2 0.613 0.613 0.86 1.000 1 0.396 -1 -1 -1 -1 -0.548 1 1 1.00 1
## 3 -0.882 1.000 0.39 -0.811 -1 -1.000 -1 -1 -1 -1 -1.000 -1 -1 -1.00 -1
## V42 V43 V44 V45 V46 V47 V48 V49 V50 V51 V52 V53 V54
## 1 0.986 -0.565 -1 -1.000 -1 -1 -1.000 -1.000 -1 -1 -1 -1.000 -1.000
## 2 1.000 1.000 1 1.000 1 1 0.875 -0.957 -1 -1 -1 -0.786 0.961
## 3 -1.000 -0.715 1 0.029 -1 -1 -1.000 -1.000 -1 -1 -1 -1.000 -0.888
## V55 V56 V57 V58 V59 V60 V61 V62 V63 V64 V65 V66 V67
## 1 -0.683 0.825 1 0.562 -1.000 -1.000 -1.000 -1.000 -1.000 -1.00 -1 -1 -1
## 2 1.000 1.000 1 0.727 0.403 0.403 0.171 -0.314 -0.314 -0.94 -1 -1 -1
## 3 -0.912 -1.000 -1 -1.000 -0.549 1.000 0.361 -1.000 -1.000 -1.00 -1 -1 -1
## V68 V69 V70 V71 V72 V73 V74 V75 V76 V77 V78 V79 V80 V81
## 1 -1 -1.000 -0.938 0.540 1 0.778 -0.715 -1.000 -1 -1.000 -1 -1 -1 -1
## 2 -1 -0.298 1.000 1.000 1 0.440 0.056 -0.755 -1 -1.000 -1 -1 -1 -1
## 3 -1 -0.938 0.694 0.057 -1 -1.000 -1.000 -0.382 1 0.511 -1 -1 -1 -1
## V82 V83 V84 V85 V86 V87 V88 V89 V90 V91 V92 V93 V94 V95
## 1 -1 -1 -1 -1.000 0.1 1.000 0.922 -0.439 -1 -1.000 -1.000 -1.000 -1 -1
## 2 -1 -1 -1 0.366 1.0 1.000 1.000 1.000 1 0.889 -0.081 -0.920 -1 -1
## 3 -1 -1 -1 -0.311 1.0 -0.043 -1.000 -1.000 -1 -0.648 1.000 0.644 -1 -1
## V96 V97 V98 V99 V100 V101 V102 V103 V104 V105 V106 V107 V108
## 1 -1 -1 -1 -1 -1 -0.257 0.950 1.000 -0.162 -1.000 -1.00 -1.000 -0.987
## 2 -1 -1 -1 -1 -1 -0.396 0.886 0.974 0.851 0.851 0.95 1.000 1.000
## 3 -1 -1 -1 -1 -1 0.489 1.000 -0.493 -1.000 -1.000 -1.00 -0.564 1.000
## V109 V110 V111 V112 V113 V114 V115 V116 V117 V118 V119 V120 V121
## 1 -0.714 -0.832 -1 -1 -1 -1 -1 -0.797 0.909 1.000 0.300 -0.961 -1
## 2 0.539 -0.754 -1 -1 -1 -1 -1 -1.000 -1.000 -0.886 -0.505 -1.000 -1
## 3 0.693 -1.000 -1 -1 -1 -1 -1 -0.966 0.988 1.000 -0.893 -1.000 -1
## V122 V123 V124 V125 V126 V127 V128 V129 V130 V131 V132 V133 V134
## 1 -1.000 -0.550 0.485 0.996 0.867 0.092 -1 -1 -1 -1 0.278 1 0.877
## 2 -0.649 0.405 1.000 1.000 0.653 -0.838 -1 -1 -1 -1 -1.000 -1 -1.000
## 3 -1.000 -0.397 1.000 0.903 -0.977 -1.000 -1 -1 -1 -1 -0.559 1 1.000
## V135 V136 V137 V138 V139 V140 V141 V142 V143 V144 V145 V146
## 1 -0.824 -1 -0.905 0.145 0.977 1.00 1.000 1.000 0.990 -0.745 -1 -1.00
## 2 -1.000 -1 -1.000 -1.000 -1.000 -0.55 0.993 1.000 0.618 -0.869 -1 -0.96
## 3 -0.297 -1 -1.000 -1.000 -0.611 1.00 0.873 -0.698 -0.552 -1.000 -1 -1.00
## V147 V148 V149 V150 V151 V152 V153 V154 V155 V156 V157
## 1 -0.950 0.847 1.000 0.327 -1.000 -1.000 0.355 1 0.655 -0.109 -0.185
## 2 -0.512 0.134 -0.343 -0.796 -1.000 -1.000 -1.000 -1 -1.000 -1.000 -0.432
## 3 -1.000 -0.126 1.000 1.000 0.766 -0.764 -1.000 -1 -0.577 1.000 0.933
## V158 V159 V160 V161 V162 V163 V164 V165 V166 V167 V168 V169
## 1 1.000 0.988 -0.723 -1 -1.000 -0.63 1.000 1.000 0.068 -0.925 0.113 0.960
## 2 0.994 1.000 0.223 -1 0.426 1.00 1.000 1.000 0.214 -1.000 -1.000 -1.000
## 3 0.484 -0.197 -1.000 -1 -1.000 -1.00 -0.818 -0.355 0.334 1.000 0.868 -0.289
## V170 V171 V172 V173 V174 V175 V176 V177 V178 V179 V180
## 1 0.308 -0.884 -1 -0.075 1.000 0.641 -0.995 -1.00 -1.000 -0.677 1.000
## 2 -1.000 -1.000 -1 -1.000 0.292 1.000 0.967 -0.88 0.449 1.000 0.896
## 3 -0.677 -0.596 1 1.000 1.000 -0.581 -1.000 -1.00 -1.000 -1.000 -1.000
## V181 V182 V183 V184 V185 V186 V187 V188 V189 V190 V191 V192
## 1 1.000 0.753 0.341 1 0.707 -0.942 -1 -1 0.545 1.000 0.027 -1
## 2 -0.094 -0.750 -1.000 -1 -1.000 -1.000 -1 -1 -1.000 -0.627 1.000 1
## 3 -1.000 -0.954 0.118 1 1.000 1.000 1 1 0.973 -0.092 -0.995 -1
## V193 V194 V195 V196 V197 V198 V199 V200 V201 V202 V203
## 1 -1.000 -1.000 -0.903 0.792 1 1.000 1.000 1.000 0.536 0.184 0.812
## 2 0.198 -0.105 1.000 1.000 1 0.639 -0.168 -0.314 -0.446 -1.000 -1.000
## 3 -1.000 -1.000 -1.000 -1.000 -1 -1.000 -0.993 -0.464 0.046 0.290 0.457
## V204 V205 V206 V207 V208 V209 V210 V211 V212 V213 V214 V215
## 1 0.837 0.978 0.864 -0.630 -1 -1.000 -1.000 -1.000 -0.452 0.828 1 1
## 2 -0.999 -0.337 0.147 0.996 1 0.667 -0.808 0.065 0.993 1.000 1 1
## 3 1.000 0.721 -1.000 -1.000 -1 -1.000 -1.000 -1.000 -1.000 -1.000 -1 -1
## V216 V217 V218 V219 V220 V221 V222 V223 V224 V225 V226 V227 V228
## 1 1 1.000 1.00 1.000 1.00 1.000 0.135 -1 -1 -1.000 -1 -1 -1.00
## 2 1 0.996 0.97 0.970 0.97 0.998 1.000 1 1 0.109 -1 -1 -0.83
## 3 -1 -1.000 -1.00 -0.426 1.00 0.555 -1.000 -1 -1 -1.000 -1 -1 -1.00
## V229 V230 V231 V232 V233 V234 V235 V236 V237 V238 V239 V240 V241
## 1 -0.483 0.813 1.0 1 1 1 1.000 1 0.219 -0.943 -1 -1.000 -1.00
## 2 -0.242 0.350 0.8 1 1 1 1.000 1 1.000 1.000 1 0.616 -0.93
## 3 -1.000 -1.000 -1.0 -1 -1 -1 0.024 1 0.388 -1.000 -1 -1.000 -1.00
## V242 V243 V244 V245 V246 V247 V248 V249 V250 V251 V252 V253
## 1 -1 -1 -1 -1 -0.974 -0.429 0.304 0.823 1.000 0.482 -0.474 -0.991
## 2 -1 -1 -1 -1 -1.000 -0.858 -0.671 -0.671 -0.033 0.761 0.762 0.126
## 3 -1 -1 -1 -1 -1.000 -1.000 -1.000 -1.000 -1.000 -0.109 1.000 -0.179
## V254 V255 V256 V257
## 1 -1.000 -1.000 -1.000 -1
## 2 -0.095 -0.671 -0.828 -1
## 3 -1.000 -1.000 -1.000 -1
## [ reached 'max' / getOption("max.print") -- omitted 9295 rows ]
# Ejercicio 3.2
equilibrio.variable.predecir <- function(datos, variable.predecir, ylab = "Cantidad de individuos",
xlab = "", main = paste("Distribución de la variable",variable.predecir), col = NA) {
gg_color <- function (n) {
hues <- seq(15, 375, length = n + 1)
hcl(h = hues, l = 65, c = 100)[1:n]
}
if(missing(variable.predecir) | !(variable.predecir %in% colnames(datos))){
stop("variable.predecir tiene que ser ingresada y ser un nombre de columna", call. = FALSE )
}
if(is.character(datos[,variable.predecir]) | is.factor(datos[,variable.predecir])){
if(length(col) == 0 || is.na(col)){
col <- gg_color(length(unique(datos[,variable.predecir])))
}else{
col <- rep(col,length(unique(datos[,variable.predecir])))
}
ggplot(data = datos, mapping = aes_string(x = variable.predecir, fill = variable.predecir)) +
geom_bar() +
scale_fill_manual(values = col, name = variable.predecir) +
labs(x = xlab, y = ylab, title = main) +
theme_minimal() +
theme(legend.position = "bottom")
}else{
stop("La variable a predecir tienen que ser de V2 factor o character", call. = FALSE )
}
}
# Índices para matrices NxN
indices.general <- function(MC) {
precision.global <- sum(diag(MC))/sum(MC)
error.global <- 1 - precision.global
precision.categoria <- diag(MC)/rowSums(MC)
res <- list(matriz.confusion = MC, precision.global = precision.global, error.global = error.global,
precision.categoria = precision.categoria)
names(res) <- c("Matriz de Confusión", "Precisión Global", "Error Global",
"Precisión por categoría")
return(res)
}
equilibrio.variable.predecir(data,"Numero")
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Fijar semilla aleatoria para reproducibilidad
set.seed(123)
# Dividir en conjuntos de entrenamiento y validación
data$Numero<-as.factor(data$Numero)
muestra <- sample(nrow(data), 0.8 * nrow(data))
taprendizaje <- data[muestra, ]
ttesting <- data[-muestra, ]
modelo <- svm(Numero~., data = taprendizaje, kernel = "radial")
prediccion <- predict(modelo,ttesting)
MC<-table(ttesting$Numero,prediccion)
# Índices de Calidad de la predicción
indices.general(MC)
## $`Matriz de Confusión`
## prediccion
## cero cinco cuatro dos nueve ocho seis siete tres uno
## cero 290 0 0 3 0 0 0 0 0 0
## cinco 2 134 0 2 1 1 0 0 3 0
## cuatro 0 0 185 2 0 1 0 1 0 0
## dos 0 0 2 184 0 2 0 1 2 1
## nueve 0 2 1 0 153 1 0 3 0 0
## ocho 2 0 1 2 0 128 0 1 2 0
## seis 2 1 0 2 0 0 153 0 0 0
## siete 0 0 2 0 5 1 0 147 0 0
## tres 0 1 0 3 0 0 0 2 177 0
## uno 0 0 1 0 0 1 0 0 0 249
##
## $`Precisión Global`
## [1] 0.9677419
##
## $`Error Global`
## [1] 0.03225806
##
## $`Precisión por categoría`
## cero cinco cuatro dos nueve ocho seis siete
## 0.9897611 0.9370629 0.9788360 0.9583333 0.9562500 0.9411765 0.9683544 0.9483871
## tres uno
## 0.9672131 0.9920319
# 0.9677419*100
# round(96.77419,2)
# 0.9920319*100
# round(99.20319,2)
print(
"Los resultados del modelo pueden ser observados en la salida de índices.general, donde se encuentran valores como el de la precisión global y la matriz de confusión. La precisión del modelo es de 96.77%, la cual es relativamente alta, lo que deriva en menos errores o confusiones en las predicciones de los datos. De igual manera que puede observar que en variables como la del 1 esta es del 99.2%, lo que demuestra un alto valor de predictibilidad. La matriz de confusión muestra el número de predicciones correctas e incorrectas para cada categoría.
"
)
## [1] "Los resultados del modelo pueden ser observados en la salida de índices.general, donde se encuentran valores como el de la precisión global y la matriz de confusión. La precisión del modelo es de 96.77%, la cual es relativamente alta, lo que deriva en menos errores o confusiones en las predicciones de los datos. De igual manera que puede observar que en variables como la del 1 esta es del 99.2%, lo que demuestra un alto valor de predictibilidad. La matriz de confusión muestra el número de predicciones correctas e incorrectas para cada categoría.\n"
print(
"Se puede llegar a la conclusión de que en general los datos obtenidos son buenos, a razón de que la precisión general del modelo es alta y las precisiones de cada categoría son superiores al 90 %, algunas incluso, siendo cercanas a 100, como lo es el caso del 1,4 y 0, con valores de 0.9920319 , 0.9788360 y 0.9897611 respectivamente. Quizá se posible el realizar mas pruebas con diferentes valores y evaluar diferentes modelos para observar de manera más completa los resultados del modelo."
)
## [1] "Se puede llegar a la conclusión de que en general los datos obtenidos son buenos, a razón de que la precisión general del modelo es alta y las precisiones de cada categoría son superiores al 90 %, algunas incluso, siendo cercanas a 100, como lo es el caso del 1,4 y 0, con valores de 0.9920319 , 0.9788360 y 0.9897611 respectivamente. Quizá se posible el realizar mas pruebas con diferentes valores y evaluar diferentes modelos para observar de manera más completa los resultados del modelo."
# Ejercicio 3.2
print(
"
En comparacion a tareas anteriores los datos y formas de emplearlos son basicamente las mismas,aunque en comparacion a los calculos anteriores se dieron algunos cambios,por ejemplo,el cuatro ahora posee un valor de 98,mientras que anteriormente tenia un valor de 95. Tambien es posible recalcar que tanto el 1 como el 0 en ambas tareas son los valores mas altos.
"
)
## [1] "\nEn comparacion a tareas anteriores los datos y formas de emplearlos son basicamente las mismas,aunque en comparacion a los calculos anteriores se dieron algunos cambios,por ejemplo,el cuatro ahora posee un valor de 98,mientras que anteriormente tenia un valor de 95. Tambien es posible recalcar que tanto el 1 como el 0 en ambas tareas son los valores mas altos.\n"
# ========================
# Ejercicio 4
# ========================
datos <- data.frame (
x = c(1 , 1 , 1 , 3 , 1 , 3 , 1 , 3 , 1) ,
y = c(0 , 0 , 1 , 1 , 1 , 2 , 2 , 2 , 1) ,
z = c(1 , 2 , 2 , 4 , 3 , 3 , 1 , 1 , 0) ,
clase = c(
" Rojo ",
" Rojo ",
" Rojo ",
" Rojo ",
" Rojo ",
" Azul ",
" Azul ",
" Azul ",
" Azul "
)
)
plot_ly(data = datos) %>%
add_trace(x = ~x, y = ~y, z = ~z, color = ~clase,
colors = c("#0C4B8E", "#BF382A"),
mode = "markers", type="scatter3d")
datos$clase<-as.factor(datos$clase)
# Ejercicio 4.2
modelo <- svm(clase ~ ., data = datos, kernel = "linear", cost = 10)
vs <- modelo$SV
coef <- t(modelo$coefs) %*% modelo$SV
a <- -coef[1] / coef[3]
b <- -coef[2] / coef[3]
d <- -modelo$rho / coef[3]
plot_ly(data = datos) %>%
add_trace(x = ~x, y = ~y, z = ~z, color = ~clase,
colors = c("#0C4B8E", "#BF382A"),
mode = "markers", type="scatter3d") %>%
add_trace(x = c(0, 3), y = c(0, 2), z = (-d - a * c(0, 3) - b * c(0, 2)) / coef[3],
mode = "lines", line = list(color = "black", width = 2))
## No trace type specified:
## Based on info supplied, a 'scatter3d' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
# 4.3
print("Una regla podría ser")
## [1] "Una regla podría ser"
print("Se clasifica como Rojo si 1.5x - 0.5y - z + 1 > 0")
## [1] "Se clasifica como Rojo si 1.5x - 0.5y - z + 1 > 0"
print("se clasifica como Azul en otro caso")
## [1] "se clasifica como Azul en otro caso"
# 4.4
print("El margen es la distancia menor entre las distancias de ambos vectores.")
## [1] "El margen es la distancia menor entre las distancias de ambos vectores."
print("los vectores de soporte son los puntos (1,0,1) y (3,2,0)")
## [1] "los vectores de soporte son los puntos (1,0,1) y (3,2,0)"
# 4.5
print("un ligero movimiento de la octava observacion no afectaria el hiperplano de margen maximo a razon de que este es practicamente insenbile a las pequeñas variaciones en datos que no involucren los vectores de soporte")
## [1] "un ligero movimiento de la octava observacion no afectaria el hiperplano de margen maximo a razon de que este es practicamente insenbile a las pequeñas variaciones en datos que no involucren los vectores de soporte"
# 4.7
plot_ly(data = datos) %>%
add_trace(x = ~x, y = ~y, z = ~z, color = ~clase,
colors = c("#0C4B8E", "#BF382A"),
mode = "markers", type="scatter3d") %>%
add_trace(x = c(2, 2), y = c(0, 2), z = c(0, 4),
mode = "lines", line = list(color = "black", width = 2))
## No trace type specified:
## Based on info supplied, a 'scatter3d' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
# 4.8
print("se agrega un punto en la coordenada (2,1.5,2) y se asigna a la clase Roja")
## [1] "se agrega un punto en la coordenada (2,1.5,2) y se asigna a la clase Roja"
datos <- rbind(datos, data.frame(x = 2, y = 1.5, z = 2, clase = "Rojo"))
plot_ly(data = datos) %>%
add_trace(x = ~x, y = ~y, z = ~z, color = ~clase,
colors = c("#0C4B8E", "#BF382A"),
mode = "markers", type="scatter3d")